How I fixed a really broken CentOS Machine after an dist upgrade

So… you do basic dist upgrade.. you don’t really make an effort to take a cloud-server image (because your me, and are an idiot! and dont even follow the advice you give your own customers, yourself!)…

Now I’ve got that bit over.. I basically found that udev wasn’t installed. This might not seem like completely the end of the world, however, the fact that /dev/random /dev/urandom were missing, and /dev/null was a regular file.. we had big issues man.. I didn’t think I’d be able to get SSH to behave again, but actually it seems this is isolated to udev in most latest centos revisions.

I’m not going to give my kernel version, but you get the idea ;-D

[root@RESCUE-pirax-test ~]# ls -al
total 40
dr-xr-x---.  5 root root 4096 Dec  7 09:34 .
dr-xr-xr-x. 18 root root 4096 Dec  7 09:34 ..
-rw-r--r--.  1 root root   18 Dec 29  2013 .bash_logout
-rw-r--r--.  1 root root  176 Dec 29  2013 .bash_profile
-rw-r--r--.  1 root root  176 Dec 29  2013 .bashrc
drwxr-xr-x.  3 root root 4096 Dec  7 09:34 .cache
drwxr-xr-x.  3 root root 4096 Dec  7 09:34 .config
-rw-r--r--.  1 root root  100 Dec 29  2013 .cshrc
drwx------.  2 root root 4096 Dec  7 09:34 .ssh
-rw-r--r--.  1 root root  129 Dec 29  2013 .tcshrc
[root@RESCUE-pirax-test ~]# cd /mnt
[root@RESCUE-pirax-test mnt]# ls -al
total 112
dr-xr-xr-x. 23 root root  4096 Dec  7 09:32 .
dr-xr-xr-x. 18 root root  4096 Dec  7 09:34 ..
lrwxrwxrwx.  1 root root     7 Feb 23  2016 bin -> usr/bin
dr-xr-xr-x.  4 root root  4096 Dec  6 11:53 boot
drwxr-xr-x.  2 root root  4096 Apr 21  2016 customer
drwxr-xr-x.  2 root root  4096 Dec  7 09:47 dev
drwxr-xr-x.  8 root root  4096 Jun 10 10:02 documents
drwxr-xr-x.  3 root root  4096 Mar  1  2016 dump
drwxr-xr-x. 91 root root 12288 Dec  7 09:45 etc
drwxr-xr-x.  5 root root  4096 Nov 11 11:58 home
drwxr-xr-x.  3 root root  4096 Nov  9  2015 include
lrwxrwxrwx.  1 root root     7 Feb 23  2016 lib -> usr/lib
lrwxrwxrwx.  1 root root     9 Feb 23  2016 lib64 -> usr/lib64
drwx------.  2 root root 16384 Sep  3  2015 lost+found
drwxr-xr-x.  2 root root  4096 Aug 12  2015 media
drwxr-xr-x.  3 root root  4096 Jun 23 14:23 mnt
drwxr-xr-x.  4 root root  4096 Aug 12  2015 opt
drwxr-xr-x.  2 root root  4096 Sep  3  2015 proc
drwxr-xr-x. 26 root root  4096 Dec  7 09:36 root
drwxr-xr-x.  3 root root  4096 Dec  7 09:43 run
lrwxrwxrwx.  1 root root     8 Feb 23  2016 sbin -> usr/sbin
drwxr-xr-x.  2 root root  4096 Aug 12  2015 srv
drwxr-xr-x.  2 root root  4096 Sep  3  2015 sys
drwxrwxrwt.  7 root root  4096 Dec  7 09:46 tmp
drwxr-xr-x. 13 root root  4096 Feb 23  2016 usr
drwxr-xr-x. 22 root root  4096 Dec  7 09:31 var
drwxr-xr-x.  9 root root  4096 Oct 12  2015 wpscan
[root@RESCUE-pirax-test mnt]# cd ..
[root@RESCUE-pirax-test /]# chroot /mnt
[root@RESCUE-pirax-test /]# ls -al
total 112
dr-xr-xr-x. 23 root root  4096 Dec  7 09:32 .
dr-xr-xr-x. 23 root root  4096 Dec  7 09:32 ..
lrwxrwxrwx.  1 root root     7 Feb 23  2016 bin -> usr/bin
dr-xr-xr-x.  4 root root  4096 Dec  6 11:53 boot
drwxr-xr-x.  2 root root  4096 Apr 21  2016 customer
drwxr-xr-x.  2 root root  4096 Dec  7 09:47 dev
drwxr-xr-x.  8 root root  4096 Jun 10 10:02 documents
drwxr-xr-x.  3 root root  4096 Mar  1  2016 dump
drwxr-xr-x. 91 root root 12288 Dec  7 09:45 etc
drwxr-xr-x.  5 root root  4096 Nov 11 11:58 home
drwxr-xr-x.  3 root root  4096 Nov  9  2015 include
lrwxrwxrwx.  1 root root     7 Feb 23  2016 lib -> usr/lib
lrwxrwxrwx.  1 root root     9 Feb 23  2016 lib64 -> usr/lib64
drwx------.  2 root root 16384 Sep  3  2015 lost+found
drwxr-xr-x.  2 root root  4096 Aug 12  2015 media
drwxr-xr-x.  3 root root  4096 Jun 23 14:23 mnt
drwxr-xr-x.  4 root root  4096 Aug 12  2015 opt
drwxr-xr-x.  2 root root  4096 Sep  3  2015 proc
drwxr-xr-x. 26 root root  4096 Dec  7 09:36 root
drwxr-xr-x.  3 root root  4096 Dec  7 09:43 run
lrwxrwxrwx.  1 root root     8 Feb 23  2016 sbin -> usr/sbin
drwxr-xr-x.  2 root root  4096 Aug 12  2015 srv
drwxr-xr-x.  2 root root  4096 Sep  3  2015 sys
drwxrwxrwt.  7 root root  4096 Dec  7 09:46 tmp
drwxr-xr-x. 13 root root  4096 Feb 23  2016 usr
drwxr-xr-x. 22 root root  4096 Dec  7 09:31 var
drwxr-xr-x.  9 root root  4096 Oct 12  2015 wpscan
[root@RESCUE-pirax-test /]# su adam
[adam@RESCUE-pirax-test /]$ ssh root@localhost
cannot read from /dev/urandom, No such file or directory
[adam@RESCUE-pirax-test /]$ yum update mkinitrd
Loaded plugins: fastestmirror, langpacks
You need to be root to perform this command.
[adam@RESCUE-pirax-test /]$ exit
exit
[root@RESCUE-pirax-test /]# yum update mkinitrd
Loaded plugins: fastestmirror, langpacks
Loading mirror speeds from cached hostfile
 * base: mirrors.vooservers.com
 * epel: epel.check-update.co.uk
 * extras: mirrors.vooservers.com
 * updates: mirrors.vooservers.com
No Match for argument: mkinitrd
No package mkinitrd available.
No packages marked for update
[root@RESCUE-pirax-test /]# yum provides initrd
Loaded plugins: fastestmirror, langpacks
Loading mirror speeds from cached hostfile
 * base: mirrors.vooservers.com
 * epel: epel.check-update.co.uk
 * extras: mirrors.vooservers.com
 * updates: mirrors.vooservers.com
No matches found
[root@RESCUE-pirax-test /]# cd /dev
[root@RESCUE-pirax-test dev]# /sbin/MAKEDEV urandom;
bash: /sbin/MAKEDEV: No such file or directory
[root@RESCUE-pirax-test dev]# yum install udev
Loaded plugins: fastestmirror, langpacks
Loading mirror speeds from cached hostfile
 * base: mirrors.vooservers.com
 * epel: epel.check-update.co.uk
 * extras: mirrors.vooservers.com
 * updates: mirrors.vooservers.com
Resolving Dependencies
--> Running transaction check
---> Package systemd.x86_64 0:219-19.el7_2.4 will be updated
--> Processing Dependency: systemd = 219-19.el7_2.4 for package: systemd-python-219-19.el7_2.4.x86_64
--> Processing Dependency: systemd = 219-19.el7_2.4 for package: systemd-sysv-219-19.el7_2.4.x86_64
---> Package systemd.x86_64 0:219-19.el7_2.13 will be an update
--> Processing Dependency: systemd-libs = 219-19.el7_2.13 for package: systemd-219-19.el7_2.13.x86_64
--> Running transaction check
---> Package systemd-libs.x86_64 0:219-19.el7_2.4 will be updated
--> Processing Dependency: systemd-libs = 219-19.el7_2.4 for package: libgudev1-219-19.el7_2.4.x86_64
---> Package systemd-libs.x86_64 0:219-19.el7_2.13 will be an update
---> Package systemd-python.x86_64 0:219-19.el7_2.4 will be updated
---> Package systemd-python.x86_64 0:219-19.el7_2.13 will be an update
---> Package systemd-sysv.x86_64 0:219-19.el7_2.4 will be updated
---> Package systemd-sysv.x86_64 0:219-19.el7_2.13 will be an update
--> Running transaction check
---> Package libgudev1.x86_64 0:219-19.el7_2.4 will be updated
---> Package libgudev1.x86_64 0:219-19.el7_2.13 will be an update
--> Finished Dependency Resolution

Dependencies Resolved

=========================================================================================================================================================================================================================================================================
 Package                                                             Arch                                                        Version                                                              Repository                                                    Size
=========================================================================================================================================================================================================================================================================
Updating:
 systemd                                                             x86_64                                                      219-19.el7_2.13                                                      updates                                                      5.1 M
Updating for dependencies:
 libgudev1                                                           x86_64                                                      219-19.el7_2.13                                                      updates                                                       67 k
 systemd-libs                                                        x86_64                                                      219-19.el7_2.13                                                      updates                                                      358 k
 systemd-python                                                      x86_64                                                      219-19.el7_2.13                                                      updates                                                      100 k
 systemd-sysv                                                        x86_64                                                      219-19.el7_2.13                                                      updates                                                       54 k

Transaction Summary
=========================================================================================================================================================================================================================================================================
Upgrade  1 Package (+4 Dependent packages)

Total size: 5.7 M
Is this ok [y/d/N]: y
Downloading packages:
Running transaction check
Running transaction test
Transaction test succeeded
Running transaction
  Updating   : systemd-libs-219-19.el7_2.13.x86_64                                                                                                                                                                                                                  1/10
  Updating   : systemd-219-19.el7_2.13.x86_64                                                                                                                                                                                                                       2/10
  Updating   : systemd-sysv-219-19.el7_2.13.x86_64                                                                                                                                                                                                                  3/10
  Updating   : systemd-python-219-19.el7_2.13.x86_64                                                                                                                                                                                                                4/10
  Updating   : libgudev1-219-19.el7_2.13.x86_64                                                                                                                                                                                                                     5/10
  Cleanup    : systemd-sysv-219-19.el7_2.4.x86_64                                                                                                                                                                                                                   6/10
  Cleanup    : systemd-python-219-19.el7_2.4.x86_64                                                                                                                                                                                                                 7/10
  Cleanup    : systemd-219-19.el7_2.4.x86_64                                                                                                                                                                                                                        8/10
  Cleanup    : libgudev1-219-19.el7_2.4.x86_64                                                                                                                                                                                                                      9/10
  Cleanup    : systemd-libs-219-19.el7_2.4.x86_64                                                                                                                                                                                                                  10/10
  Verifying  : systemd-libs-219-19.el7_2.13.x86_64                                                                                                                                                                                                                  1/10
  Verifying  : systemd-sysv-219-19.el7_2.13.x86_64                                                                                                                                                                                                                  2/10
  Verifying  : systemd-219-19.el7_2.13.x86_64                                                                                                                                                                                                                       3/10
  Verifying  : systemd-python-219-19.el7_2.13.x86_64                                                                                                                                                                                                                4/10
  Verifying  : libgudev1-219-19.el7_2.13.x86_64                                                                                                                                                                                                                     5/10
  Verifying  : systemd-libs-219-19.el7_2.4.x86_64                                                                                                                                                                                                                   6/10
  Verifying  : systemd-sysv-219-19.el7_2.4.x86_64                                                                                                                                                                                                                   7/10
  Verifying  : systemd-219-19.el7_2.4.x86_64                                                                                                                                                                                                                        8/10
  Verifying  : libgudev1-219-19.el7_2.4.x86_64                                                                                                                                                                                                                      9/10
  Verifying  : systemd-python-219-19.el7_2.4.x86_64                                                                                                                                                                                                                10/10

Updated:
  systemd.x86_64 0:219-19.el7_2.13

Dependency Updated:
  libgudev1.x86_64 0:219-19.el7_2.13                             systemd-libs.x86_64 0:219-19.el7_2.13                             systemd-python.x86_64 0:219-19.el7_2.13                             systemd-sysv.x86_64 0:219-19.el7_2.13

Complete!
[root@RESCUE-pirax-test dev]# su adam
[adam@RESCUE-pirax-test dev]$ ssh root@localhost
cannot read from /dev/urandom, No such file or directory
[adam@RESCUE-pirax-test dev]$ /sbin/
Display all 526 possibilities? (y or n)
[adam@RESCUE-pirax-test dev]$ /sbin/MAKEDEV std
bash: /sbin/MAKEDEV: No such file or directory
[adam@RESCUE-pirax-test dev]$ mknod /dev/random c 1 9
mknod: ‘/dev/random’: Permission denied
[adam@RESCUE-pirax-test dev]$ exit
exit
[root@RESCUE-pirax-test dev]# mknod /dev/random c 1 9
[root@RESCUE-pirax-test dev]# su adam
[adam@RESCUE-pirax-test dev]$ ssh root@localhost
cannot read from /dev/urandom, No such file or directory
[adam@RESCUE-pirax-test dev]$ exit
exit
[root@RESCUE-pirax-test dev]# mknod /dev/urandom c 1 9
[root@RESCUE-pirax-test dev]# su adam
[adam@RESCUE-pirax-test dev]$ ssh root@localhost
Host key verification failed.
[adam@RESCUE-pirax-test dev]$ exit


Dec  7 09:23:55 pirax-test login: FAILED LOGIN 1 FROM tty1 FOR root, Authentication failure
Dec  7 09:32:00 pirax-test polkitd[1031]: Loading rules from directory /etc/polkit-1/rules.d
Dec  7 09:32:00 pirax-test polkitd[1031]: Loading rules from directory /usr/share/polkit-1/rules.d
Dec  7 09:32:00 pirax-test polkitd[1031]: Finished loading, compiling and executing 2 rules
Dec  7 09:32:00 pirax-test polkitd[1031]: Acquired the name org.freedesktop.PolicyKit1 on the system bus
Dec  7 09:32:10 pirax-test sshd[1375]: Server listening on 0.0.0.0 port 666.
Dec  7 09:32:10 pirax-test sshd[1375]: Server listening on :: port 666.
Dec  7 09:32:24 pirax-test unix_chkpwd[2692]: password check failed for user (root)
Dec  7 09:32:24 pirax-test login: pam_unix(login:auth): authentication failure; logname=LOGIN uid=0 euid=0 tty=tty1 ruser= rhost=  user=root
Dec  7 09:32:24 pirax-test login: pam_succeed_if(login:auth): requirement "uid >= 1000" not met by user "root"
Dec  7 09:32:27 pirax-test login: FAILED LOGIN 1 FROM tty1 FOR root, Authentication failure
Dec  7 09:32:32 pirax-test unix_chkpwd[2694]: password check failed for user (root)
Dec  7 09:32:32 pirax-test login: pam_succeed_if(login:auth): requirement "uid >= 1000" not met by user "root"
Dec  7 09:32:34 pirax-test login: FAILED LOGIN 2 FROM tty1 FOR root, Authentication failure
Dec  7 09:32:39 pirax-test unix_chkpwd[2696]: password check failed for user (root)
Dec  7 09:32:39 pirax-test login: pam_succeed_if(login:auth): requirement "uid >= 1000" not met by user "root"
Dec  7 09:32:41 pirax-test login: FAILED LOGIN SESSION FROM tty1 FOR root, Authentication failure
Dec  7 09:32:41 pirax-test login: PAM 2 more authentication failures; logname=LOGIN uid=0 euid=0 tty=tty1 ruser= rhost=  user=root
Dec  7 10:05:17 pirax-test polkitd[1029]: Loading rules from directory /etc/polkit-1/rules.d
Dec  7 10:05:17 pirax-test polkitd[1029]: Loading rules from directory /usr/share/polkit-1/rules.d
Dec  7 10:05:17 pirax-test polkitd[1029]: Finished loading, compiling and executing 2 rules
Dec  7 10:05:17 pirax-test polkitd[1029]: Acquired the name org.freedesktop.PolicyKit1 on the system bus
Dec  7 10:05:27 pirax-test sshd[1375]: Server listening on 0.0.0.0 port 666.
Dec  7 10:05:27 pirax-test sshd[1375]: Server listening on :: port 666.
Dec  7 10:09:56 pirax-test unix_chkpwd[2791]: password check failed for user (root)
Dec  7 10:09:56 pirax-test login: pam_unix(login:auth): authentication failure; logname=LOGIN uid=0 euid=0 tty=tty1 ruser= rhost=  user=root
Dec  7 10:09:56 pirax-test login: pam_succeed_if(login:auth): requirement "uid >= 1000" not met by user "root"
Dec  7 10:09:58 pirax-test login: FAILED LOGIN 1 FROM tty1 FOR root, Authentication failure
Dec  7 10:10:07 pirax-test unix_chkpwd[2802]: password check failed for user (root)
Dec  7 10:10:07 pirax-test login: pam_succeed_if(login:auth): requirement "uid >= 1000" not met by user "root"
Dec  7 10:10:09 pirax-test login: FAILED LOGIN 2 FROM tty1 FOR root, Authentication failure
[root@RESCUE-pirax-test /]# adduser adam
adduser: user 'adam' already exists
[root@RESCUE-pirax-test /]# passwd adam
Changing password for user adam.
New password:
Retype new password:
passwd: all authentication tokens updated successfully.
[root@RESCUE-pirax-test /]# passwd root
Changing password for user root.
New password:
Retype new password:
passwd: all authentication tokens updated successfully.
[root@RESCUE-pirax-test /]# su adam
[adam@RESCUE-pirax-test /]$ ssh adam@localhost
Host key verification failed.
[adam@RESCUE-pirax-test /]$ exit
exit
[root@RESCUE-pirax-test /]# vi /etc/ss
ssh/ ssl/
[root@RESCUE-pirax-test /]# vi /etc/ss
ssh/ ssl/
[root@RESCUE-pirax-test /]# vi /etc/ssh/
moduli                    ssh_config                sshd_config               ssh_host_ecdsa_key        ssh_host_ecdsa_key.pub    ssh_host_ed25519_key      ssh_host_ed25519_key.pub  ssh_host_rsa_key          ssh_host_rsa_key.pub
[root@RESCUE-pirax-test /]# vi /etc/ssh/sshd_config
[root@RESCUE-pirax-test /]# su adam
[adam@RESCUE-pirax-test /]$ ssh adam@localhost
Host key verification failed.
[adam@RESCUE-pirax-test /]$ exit
exit
[root@RESCUE-pirax-test /]# mknod -m 666 /dev/tty c 5 0
[root@RESCUE-pirax-test /]# su adam
[adam@RESCUE-pirax-test /]$ ssh adam@localhost
The authenticity of host 'localhost (::1)' can't be established.
ECDSA key fingerprint is e1:0c:0f:95:94:55:84:00:0d:e9:36:42:1d:6d:9e:0c.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts.
adam@localhost's password:

It’s still kind of broken, but its getting less broken by the minute ;-D

Comparing Files on the internet or CDN with MD5 to determine if they present same content

So, a customer today was having some issues with their CDN. They said that their SSL CDN was presenting a different image, than the HTTP CDN. So, I thought the best way to begin any troubleshooting process would firstly be to try and recreate those issues. To do that, I need a way to compare the files programmatically, enter md5sum a handly little shell application usually installed by default on most Linux OS.

[user@cbast3 ~]$ curl https://3485asd3jjc839c9d3-08e84cacaacfcebda9281e3a9724b749.ssl.cf3.rackcdn.com/companies/5825cb13f2e6c9632807d103/header.jpeg -o file ; cat file | md5sum
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  382k  100  382k    0     0  1726k      0 --:--:-- --:--:-- --:--:-- 1732k
e917a67bbe34d4eb2d4fe5a87ce90de0  -
[user@cbast3 ~]$ curl http://3485asd3jjc839c9d3-08e84cacaacfcebda9281e3a9724b749.r45.cf3.rackcdn.com/companies/5825cb13f2e6c9632807d103/header.jpeg -o file2 ; cat file2 | md5sum
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  382k  100  382k    0     0  2071k      0 --:--:-- --:--:-- --:--:-- 2081k
e917a67bbe34d4eb2d4fe5a87ce90de0  -

As we can see from the output of both, the md5sum (the hashing) of the two files is the same, this means there is a statistically very very very high chance the content is exactly the same, especially when passing several hundred characters or more. The hashing algorithm is combination based, so the more characters, the less likely same combination is of coming around twice!

In this case I was able to disprove the customers claim’s. Not because I wanted to, but because I wanted to solve their issue. These results show me, the issue must be, if it is with the CDN, with a local edgenode local to the customer having the issue. Since I am unable to recreate it from my location, it is therefore not unreasonable to assume that it is a client side issue, or a failure on our CDN edgenode side, local to the customer. That’s how I troubleshooted this, and quite happy with this one! Took about 2 minutes to do, and a few minutes to come up with. A quick and useful check indeed, which reduces the number of possibilities considerably in tracing down the issue!

Cheers &
Best wishes,
Adam

Please note the real CDN location has been altered for privacy reasons

How to limit the amount of memory httpd is using on CentOS 7 with Cgroups

CentOS 7, introduced something called CGroups, or control groups which has been in the stable kernel since about 2006. The systemD unit is made of several parts, systemD unit resource controllers can be used to limit memory usage of a service, such as httpd control group in systemD.

# Set Memory Limits for a systemD unit
systemctl set-property httpd MemoryLimit=500MB

# Get Limits for a systemD Unit
systemctl show -p CPUShares 
systemctl show -p MemoryLimit

Please note that OS level support is not generally provided with managed infrastructure service level, however I wanted to help where I could hear, and it shouldn’t be that difficult because the new stuff introduced in SystemD and CGroups is much more powerful and convenient than using ulimit or similar.

Aaron Mehar’s CBS to VHD solution for Rackspace Cloud

Hey. So another one of my colleagues put together this really awesome article. Although I was aware this could be done, he’s done a really good job or putting together the procedures, of turning your CBS BFV (boot from (network) volume) disk into a VHD file.

Rackspace CBS disks works over iscsi and are presented via the network. The difference between instance store on the hypervisor, (utilized by cloud-server images), and the disk store on the CBS is that the CBS disk is not a VHD, but an disk presented over network via iscsi.

So, to take a VHD, or an equivalent cloud-server image snapshot, you need to image the disk manually, as well as convert it to VHD.

Taking an image of a volume is not possible, and would not be downloadable. However there are some workarounds that can be done.

*** Please NOTE ****
This is not supported, and we can not assist beyond these instructions. I could provide some clarity if required, however, my collegaues may not be able to help should I become unavailable.

If you just want the data, then you could just download the data to your local machine, however, if you a VHD to create a local VM, then the below instructions will achieve this.

Steps

Please take special care, making a mistake working with partitioner can wipe all your data

1. Shutdown the server
2. Clone the disk, by Starting a volume clone and start the server back up.
3. Attach the newly created clone to the server
4. create another new CBS volume of a slightly larger size (+5GB is OK)

Now that is done, we can image the disk. You will need to ensure you have the corrects disks. The second disk with data should be xvbd and the new CBS should be xvdc

Create partition and filesystem for xvdbc. Please see this guide: https://support.rackspace.com/how-to/prepare-your-cloud-block-storage-volume/

the image xvdb to xvdc

   dd if=/dev/xvdb of=/mnt/cbsvolume1/myimage.dd

The download the image to your workstation, and install VirtualBox, and run the below command

   VBoxManage convertfromraw myfile.dd myfile.vhd --format VHD

Please take special care, making a mistake working with partitioner can wipe all your data

Fixing nova-agent bugs caused by yum update

1. Download newest version of nova-agent from the github repo to the server you want to upgrade : https://github.com/rackerlabs/openstack-guest-agents-unix/releases

For this instance I used “nova-agent-1.39-1.x86_64.rpm” since it’s CentOS / Redhat based

2. Stop current nova-agent service

service nova-agent stop

3. Remove current in place nova-agent. I found the easiest way to do this is to just remove the entire contents of it’s directory.

rm -rf /usr/share/nova-agent/*

4. Install the new nova-agent with the RPM

rpm -ivh --nosignature nova-agent-1.39-1.x86_64.rpm

5. Start the new nova-agent service

service nova-agent start (might need to use systemd for CentOS/RHEL 7 and above)
# ie 
systemctl enable nova-agent
systemctl start nova-agent

6. Issue a networking reset to verify it is working. Check logs and verify that you see this message : ‘resetnetwork’ completed with code ‘0’, message ”

uuid=$(uuidgen)
xenstore-write data/host/$uuid '{"name":"resetnetwork","value":""}'

tail -20 /var/log/nova-agent.log

That’s it! Once you’ve done that you should reboot to verify that the nova-agent comes up on boot but otherwise nothing else is needed. Hope it helps!

Thanks to Sean from Rackspace for this.. you rock dudery.

Killing a stuck sr-scan when vdi gets suck in vdi_deactivate

Simple fix for this one, kill the task directly by it’s PID. Using ps , grep and sed, to select correct process.

When executing this in live production, take especial care.

# xe task-list 
uuid ( RO)                : 7cb8b30b-563c-db9e-7682-e25aac2b2f07
          name-label ( RO): SR.scan
    name-description ( RO): 
              status ( RO): pending
            progress ( RO): 0.000
[root@21-16-246-494694 ~]# date
Thu Nov 10 11:41:45 UTC 2016
[root@xenhost ~]# kill -TERM $(ps fauxww | grep "[t]ap-ctl close" | sed -e "s/.*-p //" | awk '{print $1}')

Help! I can’t login to my cloud-server even though I’ve reset my root password

The most common cause of this is the permit root login is set to no, although there might be other causes, like a really broken sshd_config, instead of just one variable. The procedure for looking into this is pretty much the same regardless of the breakage that has occurred. Here is what you need to do:

Here’s the full procedure:

1) Put server into rescue mode.
2) Login to cloud-server on SSH port, please note rescue mode gives you a new temporary root password allowing you to reset the password for SSH on the ‘original disk’.
3) once logged in mount the /dev/xvdb devices, this may be /dev/xvdb1 or /dev/xvdb2 but is usually /dev/xvdb1 and chroot (change root to the ‘original disk’)

# Mount old disk
mnt /dev/xvdb1 /mnt

# Change to the ‘old disk’
chroot /mnt

# Set the new password for root on the old disk:

passwd
# enter the new password when prompted

and specifically ensure that /etc/ssh/sshd_config has this line:

PermitRootLogin no

changed to:

PermitRootLogin yes

Your developer or sysad won’t be able to login until you reset the root password here, and if you do not know the username to su to root from, it is absolutely critical to perform this work, otherwise you won’t be able to access the server.

Also, once you have allowed the root login, and changed the password to something you recognise you will be able to exit rescue mode thru the control panel and login to the machine as normal.

For more detail about how to do this (although all the steps are here pretty much, please see):

https://support.rackspace.com/how-to/rackspace-cloud-essentials-rescue-mode-on-linux-cloud-servers/

I hope this helps you folks out some,

All About NOVA and Xen Tools in Rackspace Cloud – why can’t I connect to my Windows server?

Why can’t I connect to my Rackspace Windows cloud-server, you ask? 2 important questions.

1. Is it a new build?
2. Is it using a custom image (a non rackspace base image).

(because the rackspace base images all have correct nova-agent and xen tools, so get networking information OK. But customer images don’t!). In the case you have run the below tests to see if nova-agent is running (or installed), you will need to install them.

Checking for the nova-agent and xe-guest-utilities

ps auxfwww | grep nova-agent
yum -qa xe-guest-utilities nova-agent
dpkg -l xe-guest-utilities nova-agent

Explanation and solution

Thanks for reaching out to us with your inquiry today. I’m glad to convey to you that I understand what the problem is with your cloud-server not being contactable.

Main reasons for breakage

The main reason why this is not working is most likely caused by some important pieces of software being missing. There is a piece of software called nova-agent, which is responsible for setting your cloud-servers IPV4 address, network subnet/mask, and ip routes, when it is first built. This is important, since the server image you built the server from, has different network details.

The rackspace build process giving networking detail to the VM is completely dependent on xe-guest-utilities and nova-agent

What has happened in this case, because the nova-agent wasn’t running on the cloud-server, the hypervisor software Rackspace use to automate cloud-server builds wasn’t able to contact the nova-agent running on your cloud-server, and therefore nova-agent wasn’t able to update the networking information. And hence, your not able to connect to it on it’s IPv4 address you are given at build time.

The steps to resolution: installing nova-agent and xen guest utilities
As such, nova-agent needs to be installed on the cloud-server you take the image from, it can be installed as follows:

https://community.rackspace.com/products/f/25/t/5694

Also nova-agent uses another piece of important software called xe-guest-utilities, or (Xen Tools) for your windows servers, this is an important ‘PV’ paravirtualization tools, responsible for seamless management of cloud-servers. Sorry that in this case it’s not working out seamlessly, but this can happen with images taken of servers which have had nova-agent disabled, uninstalled, or similar.

Upgrading the tools that nova-agent depends upon, can be installed by following the instructions at the following location:

https://support.rackspace.com/how-to/upgrade-citrix-xen-server-tools-for-windows-cloud-servers/

# Options of how to do this / Summary of Solution Steps

Naturally, you might be wondering how to achieve these changes, if you cannot RDP to the server. This is quite understandable, there are two ways to get this working;

Option 1) Manually install nova-agent on the current server you cannot access, then manually install the Xen Tools in the same way. This shall fix the OS on the server itself, and not the original image you built the server from. So it is important to create a new cloud-server image after performing these steps and us verifying tools + nova-agent installed correctly.

2) Manually install nova-agent on the source server you initially taken the image from, and install Xen Tools, then re-image the server, and then re-deploy. This should seamlesssly work each time on build with that image, provided the tools are installed. You will not need to recreate the image, since your fixing the problem on the cloud-server source that the original image was taken from.

I appreciate that these things are not 100% simple to get your head around and can be confusing for customers, I hope my explanation and summary makes this a little more painless to fix. Of course if you have additional questions, comments or concerns or don’t understand something I’ve said, please don’t hesitate to reach out to us, we are here to help!

Creating a proper Method of Retrieving, Sorting, and Parsing Rackspace CDN Access Logs

So, this has been rather a bane on the life which is lived as Adam Bull. Basically, a large customer of ours had 50+ CDN’s, and literally hundreds of gigabytes of Log Files. They were all in Rackspace Cloud Files, and the big question was ‘how do I know how busy my CDN is?’.

screen-shot-2016-11-07-at-12-41-30-pm

This is a remarkably good question, because actually, not many tools are provided here, and the customer will, much like on many other CDN services, have to download those logs, and then process them. But that is actually not easier either, and I spent a good few weeks (albeit when I had time), trying to figure out the best way to do this. I dabbled with using tree to display the most commonly used logs, I played with piwik, awstats, and many others such as goaccess, all to no avail, and even used a sophisticated AWK script from our good friends in Operations. No luck, nothing, do not pass go, or collect $200. So, I was actually forced to write something to try and achieve this, from start to finish. There are 3 problems.

1) how to easily obtain .CDN_ACCESS_LOGS from Rackspace Cloud Files to Cloud Server (or remote).
2) how to easily process these logs, in which format.
3) how to easily present these logs, using which application.

The first challenge was actually retrieving the files.

swiftly --verbose --eventlet --concurrency=100 get .CDN_ACCESS_LOGS --all-objects -o ./

Naturally to perform this step above, you will need a working, and setup swiftly environment. If you don’t know what swiftly, is or understand how to set up a swiftly envrionment, please see this article I wrote on the subject of deleting all files with swiftly (The howto explains the environment setup first! Just don’t follow the article to the end, and continue from here, once you’ve setup and installed swiftly)

Fore more info see:
https://community.rackspace.com/products/f/25/t/7190

Processing the Rackspace CDN Logs that we’ve downloaded, and organising them for further log processing
This required a lot more effort, and thought

The below script sits in the same folder as all of the containers

# ls -al 
total 196
drwxrwxr-x 36 root root  4096 Nov  7 12:33 .
drwxr-xr-x  6 root root  4096 Nov  7 12:06 ..
# used by my script
-rw-rw-r--  1 root root  1128 Nov  7 12:06 alldirs.txt

# CDN Log File containers as we downloaded them from swiftly Rackspace Cloud Files (.CDN_ACCESS_LOGS)
drwxrwxr-x  3 root root  4096 Oct 19 11:22 dev.demo.video.cdn..com
drwxrwxr-x  3 root root  4096 Oct 19 11:22 europe.assets.lon.tv
drwxrwxr-x  5 root root  4096 Oct 19 11:22 files.lon.cdn.lon.com
drwxrwxr-x  3 root root  4096 Oct 19 11:23 files.blah.cdn..com
drwxrwxr-x  5 root root  4096 Oct 19 11:24 files.demo.cdn..com
drwxrwxr-x  3 root root  4096 Oct 19 11:25 files.invesco.cdn..com
drwxrwxr-x  3 root root  4096 Oct 19 11:25 files.test.cdn..com
-rw-r--r--  1 root root   561 Nov  7 12:02 generate-report.sh
-rwxr-xr-x  1 root root  1414 Nov  7 12:15 logparser.sh

# Used by my script
drwxr-xr-x  2 root root  4096 Nov  7 12:06 parsed
drwxr-xr-x  2 root root  4096 Nov  7 12:33 parsed-combined
#!/bin/bash

# Author : Adam Bull
# Title: Rackspace CDN Log Parser
# Date: November 7th 2016

echo "Deleting previous jobs"
rm -rf parsed;
rm -rf parsed-combined

ls -ld */ | awk '{print $9}' | grep -v parsed > alldirs.txt


# Create Location for Combined File Listing for CDN LOGS
mkdir parsed

# Create Location for combined CDN or ACCESS LOGS
mkdir parsed-combined

# This just builds a list of the CDN Access Logs
echo "Building list of Downloaded .CDN_ACCESS_LOG Files"
sleep 3
while read m; do
folder=$(echo "$m" | sed 's@/@@g')
echo $folder
        echo "$m" | xargs -i find ./{} -type f -print > "parsed/$folder.log"
done < alldirs.txt

# This part cats the files and uses xargs to produce all the Log oiutput, before cut processing and redirecting to parsed-combined/$folder
echo "Combining .CDN_ACCESS_LOG Files for bulk processing and converting into NCSA format"
sleep 3
while read m; do
folder=$(echo "$m" | sed 's@/@@g')
cat "parsed/$folder.log" | xargs -i zcat {} | cut -d' ' -f1-10  > "parsed-combined/$folder"
done < alldirs.txt


# This part processes the Log files with Goaccess, generating HTML reports
echo "Generating Goaccess HTML Logs"
sleep 3
while read m; do
folder=$(echo "$m" | sed 's@/@@g')
goaccess -f "parsed-combined/$folder" -a -o "/var/www/html/$folder.html"
done < alldirs.txt

How to easily present these logs

I kind of deceived you with the last step. Actually, because I have already done it, with the above script. Though, you will naturally need to have an httpd installed, and a documentroot in /var/www/html, so make sure you install apache2:

yum install httpd awstats

De de de de de de da! da da!

screen-shot-2016-11-07-at-12-41-30-pm

Some little caveats:

Generating a master index.html file of all the sites


[root@cdn-log-parser-mother html]# pwd
/var/www/html
[root@cdn-log-parser-mother html]# ls -al | awk '{print $9}' | xargs -i echo " {}
" > index.html

I will expand the script to generate this automatically soon, but for now, leaving like this due to time constraints.

Enabling Automatic Security Updates in CentOS 6, 7 and RHEL 6 and RHEL 7 (and Debian and Ubuntu too)

yum -y install yum-cron

This can also be done on Debian and Ubuntu systems if you are feeling left out:

apt-get -y install unattended-upgrades

Configuration on the CentOS/RHEL side is:

da da da da da da da ! Actually that’s all you need to do to enable it, but there are a lot of things you can customise in /etc/yum/yum-cron.conf

[commands]
#  What kind of update to use:
# default                            = yum upgrade
# security                           = yum --security upgrade
# security-severity:Critical         = yum --sec-severity=Critical upgrade
# minimal                            = yum --bugfix update-minimal
# minimal-security                   = yum --security update-minimal
# minimal-security-severity:Critical =  --sec-severity=Critical update-minimal
update_cmd = default

# Whether a message should be emitted when updates are available,
# were downloaded, or applied.
update_messages = yes

# Whether updates should be downloaded when they are available.
download_updates = yes

# Whether updates should be applied when they are available.  Note
# that download_updates must also be yes for the update to be applied.
apply_updates = no

# Maximum amout of time to randomly sleep, in minutes.  The program
# will sleep for a random amount of time between 0 and random_sleep
# minutes before running.  This is useful for e.g. staggering the
# times that multiple systems will access update servers.  If
# random_sleep is 0 or negative, the program will run immediately.
# 6*60 = 360
random_sleep = 360


[emitters]
# Name to use for this system in messages that are emitted.  If
# system_name is None, the hostname will be used.
system_name = None

# How to send messages.  Valid options are stdio and email.  If
# emit_via includes stdio, messages will be sent to stdout; this is useful
# to have cron send the messages.  If emit_via includes email, this
# program will send email itself according to the configured options.
# If emit_via is None or left blank, no messages will be sent.
emit_via = stdio

# The width, in characters, that messages that are emitted should be
# formatted to.
ouput_width = 80


[email]
# The address to send email messages from.
email_from = root@localhost

# List of addresses to send messages to.
email_to = root

# Name of the host to connect to to send email messages.
email_host = localhost


[groups]
# NOTE: This only works when group_command != objects, which is now the default
# List of groups to update
group_list = None

# The types of group packages to install
group_package_types = mandatory, default

[base]
# This section overrides yum.conf

# Use this to filter Yum core messages
# -4: critical
# -3: critical+errors
# -2: critical+errors+warnings (default)
debuglevel = -2

# skip_broken = True
mdpolicy = group:main

# Uncomment to auto-import new gpg keys (dangerous)
# assumeyes = True